rm(list=ls())
## Load Packages
pks <- c("dplyr", "dataRetrieval", "rmarkdown",
"plotly", "DT", "broom", "viridis")
lapply(pks, library, character.only=TRUE)
These data were copied and pasted out of the TMDL appendix. Then reformatted into csv format, imported with R, date interpreted to include date and posixt objects, and saved as an .rds object.
## Load in data extracted from 2005 TMDL appendix
stateData <- readRDS("c:/users/95218/documents/r/public/long-creek/stateData.rds")
print(stateData)
## dt.DATE dt.POSIX tss turbidity
## 1 1997-01-08 1997-01-08 6.0 11.0
## 2 1997-02-17 1997-02-17 14.0 28.0
## 3 1997-03-12 1997-03-12 5.0 12.0
## 4 1997-04-07 1997-04-07 28.0 40.0
## 5 1997-05-13 1997-05-13 8.0 7.3
## 6 1997-06-04 1997-06-04 16.0 32.0
## 7 1997-07-16 1997-07-16 6.0 17.0
## 8 1997-08-06 1997-08-06 14.0 26.0
## 9 1997-09-04 1997-09-04 5.0 4.2
## 10 1997-10-21 1997-10-21 18.0 37.0
## 11 1997-11-06 1997-11-06 4.0 7.9
## 12 1997-12-09 1997-12-09 5.0 7.7
## 13 1998-01-07 1998-01-07 23.0 75.0
## 14 1998-02-02 1998-02-02 4.0 13.0
## 15 1998-03-04 1998-03-04 1.0 10.0
## 16 1998-04-01 1998-04-01 7.0 6.6
## 17 1998-05-04 1998-05-04 1.0 11.0
## 18 1998-06-03 1998-06-03 7.0 9.6
## 19 1998-07-01 1998-07-01 64.0 120.0
## 20 1998-08-24 1998-08-24 4.0 5.8
## 21 1998-09-08 1998-09-08 3.0 8.8
## 22 1998-10-28 1998-10-28 1.0 2.9
## 23 1998-11-16 1998-11-16 11.0 35.0
## 24 1998-12-21 1998-12-21 3.0 14.0
## 25 1999-01-06 1999-01-06 9.0 27.0
## 26 1999-02-08 1999-02-08 3.0 10.0
## 27 1999-03-10 1999-03-10 7.0 23.0
## 28 1999-04-12 1999-04-12 4.0 4.9
## 29 1999-05-12 1999-05-12 5.0 9.3
## 30 1999-06-10 1999-06-10 8.0 22.0
## 31 1999-07-13 1999-07-13 28.0 43.0
## 32 1999-08-10 1999-08-10 3.0 11.0
## 33 1999-09-01 1999-09-01 1.0 5.6
## 34 1999-10-06 1999-10-06 1.0 20.0
## 35 1999-11-08 1999-11-08 1.0 45.0
## 36 1999-12-13 1999-12-13 2.0 18.0
## 37 2000-01-20 2000-01-20 61.0 95.0
## 38 2000-02-02 2000-02-02 2.0 50.0
## 39 2000-03-09 2000-03-09 2.0 4.7
## 40 2000-04-05 2000-04-05 4.0 13.0
## 41 2000-05-09 2000-05-09 4.0 10.0
## 42 2000-06-14 2000-06-14 4.0 3.6
## 43 2000-07-06 2000-07-06 5.0 8.5
## 44 2000-08-03 2000-08-03 4.0 11.0
## 45 2000-09-06 2000-09-06 NA 50.0
## 46 2000-10-09 2000-10-09 NA 3.8
## 47 2000-11-08 2000-11-08 2.0 2.0
## 48 2000-12-13 2000-12-13 NA 5.2
## 49 2001-01-11 2001-01-11 NA 13.0
## 50 2001-02-08 2001-02-08 1.0 4.7
## 51 2001-04-26 2001-04-26 NA 18.0
## 52 2001-05-16 2001-05-16 5.0 4.6
## 53 2001-06-07 2001-06-07 NA 8.3
## 54 2001-07-02 2001-07-02 NA 100.0
## 55 2001-08-21 2001-08-21 3.0 4.9
## 56 2001-09-17 2001-09-17 NA 220.0
## 57 2001-10-18 2001-10-18 NA 14.0
## 58 2001-11-15 2001-11-15 2.5 3.8
## 59 2001-12-05 2001-12-05 NA 4.5
## 60 2002-01-09 2002-01-09 NA 21.0
## 61 2002-02-20 2002-02-20 2.5 4.5
## 62 2002-03-11 2002-03-11 NA 5.2
## 63 2002-04-10 2002-04-10 NA 4.8
## 64 2002-05-16 2002-05-16 9.0 28.0
## 65 2002-06-06 2002-06-06 NA 75.0
## 66 2002-07-09 2002-07-09 NA 12.0
## 67 2002-08-08 2002-08-08 20.0 22.0
## 68 2002-09-11 2002-09-11 NA 9.5
## 69 2002-10-01 2002-10-01 NA 9.8
## 70 2002-11-06 2002-11-06 76.0 160.0
## 71 2002-12-30 2002-12-30 NA 13.0
## 72 2003-01-22 2003-01-22 NA 5.8
## 73 2003-02-11 2003-02-11 6.0 16.0
## 74 2003-03-13 2003-03-13 NA 9.6
## 75 2003-04-03 2003-04-03 NA 14.0
## 76 2003-05-22 2003-05-22 21.0 170.0
## 77 2003-06-10 2003-06-10 NA 24.0
## 78 2003-07-10 2003-07-10 NA 120.0
## 79 2003-08-05 2003-08-05 22.0 160.0
## 80 2003-09-11 2003-09-11 NA 4.5
## 81 2003-10-02 2003-10-02 NA 5.2
## 82 2003-11-04 2003-11-04 4.0 2.9
## 83 2003-12-04 2003-12-04 NA 4.0
## 84 2004-01-14 2004-01-14 NA 9.0
## 85 2004-02-09 2004-02-09 21.0 40.0
## 86 2004-03-03 2004-03-03 NA 28.0
Use dataRetrieval to get historical flow daily flow data for MC10.
usgsFlow <- readNWISdv(siteNumbers="02142900", parameterCd="00060",
startDate="1965-01-01", endDate="2004-12-01")
usgsFlow <- usgsFlow[,3:4]
names(usgsFlow) <- c("dt.DATE", "cfs")
head(usgsFlow)
## dt.DATE cfs
## 1 1965-06-01 4.3
## 2 1965-06-02 4.0
## 3 1965-06-03 4.0
## 4 1965-06-04 4.7
## 5 1965-06-05 4.6
## 6 1965-06-06 4.0
Attach the daily flow rates to the dataset.
stateData <- merge(stateData, usgsFlow, by="dt.DATE", all.x=TRUE, all.y=FALSE)
Recreate figure 15, a time series of daily flow rates plotted with the TSS and Turbdity samples used in the TMDL, and with a 50 NTU reference line.
## subset flow for plotting
flow <- filter(usgsFlow, dt.DATE > "1997-01-01",
dt.DATE < "2004-05-01")
plot1 <- plot_ly() %>%
add_trace(data=flow, x=~dt.DATE, y=~cfs,
type="scatter", mode="lines", name="Flow",
yaxis="y2") %>%
add_markers(data=stateData, x=~dt.DATE, y=~tss,
type="scatter", mode="markers", name="TSS") %>%
add_markers(data=stateData, x=~dt.DATE, y=~turbidity,
type="scatter", mode="markers", name="Turbidity") %>%
add_trace(x=c(min(stateData$dt.DATE), max(stateData$dt.DATE)),
y=c(50,50), type="scatter", mode="lines", name="Standard") %>%
layout(yaxis=list(title="TSS or Turbidity",
xaxis=list(title="Date"),
range=c(0,500)),
yaxis2=list(overlaying = "y", side = "right",
title = "Flow (cfs)",
showgrid=FALSE, range=c(0,1200)),
legend=list(x=.2, y=100, orientation='h'))
plot1
## Warning: Ignoring 28 observations
Here we see a slight difference. The TMDL Table counts 21 “moist” samples and 16 “mid-range”. My accounting has 23 “moist” samples and 14 “mid-range”. It is possible that USGS updated historical records in some way since the State did their analysis; possible the State did something wrong; possible I did something wrong. Not really a big deal as the distinction between moist and mid doesn’t have much regulatory importance.
## Verbose function to create Table 5
returnExcursionsState <- function(chem, flow) {
high <- c(90, unname(quantile(flow$cfs, 0.90)),
nrow(chem %>% filter(cfs>unname(quantile(flow$cfs, 0.90)))),
nrow(chem %>% filter(cfs>unname(quantile(flow$cfs, 0.90)),
turbidity>50))
)
moist <- c(60, unname(quantile(flow$cfs, 0.60)),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.90)),
cfs>unname(quantile(flow$cfs, 0.60)))),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.90)),
cfs>unname(quantile(flow$cfs, 0.60)),
turbidity>50))
)
mid <- c(40, unname(quantile(flow$cfs, 0.40)),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.60)),
cfs>unname(quantile(flow$cfs, 0.40)))),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.60)),
cfs>unname(quantile(flow$cfs, 0.40)),
turbidity>50))
)
dry <- c(5, unname(quantile(flow$cfs, 0.05)),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.40)),
cfs>unname(quantile(flow$cfs, 0.05)))),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.40)),
cfs>unname(quantile(flow$cfs, 0.05)),
turbidity>50))
)
low <- c(0,
unname(quantile(flow$cfs, 0.0)),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.05)))),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.05)),
turbidity>50))
)
returnTable <- as.data.frame(rbind(high, moist, mid, dry, low))
names(returnTable) <- c("Percentile","Flow Rate","# Samples","# Exc.")
returnTable
}
DT::datatable(returnExcursionsState(chem=stateData, flow=usgsFlow))
Since the State had many turbidity samples without matching TSS, and wanted to base the TMDL on TSS, they developed this relationship. I neglect regression diagnostics (because they did!), but get essentially the same relationship they did. So far so good.
But, weird that this dataset suggests 50 NTU is equivalent to 17 mg/L TSS. Most of our data sets put that number at ~45 mg/L TSS.
plot2 <- plot_ly() %>%
add_markers(data=stateData, x=~turbidity, y=~tss,
type="scatter", mode="markers",) %>%
layout(yaxis=list(title="TSS (mg/L)"), xaxis=list(title="Turbidity (NTU)"))
## if (input$logSedSt==TRUE) {
## tsP <- layout(tsP, yaxis=list(
## title="TSS",
## type="log"))
## } else {
## tsP <- layout(tsP, yaxis=list(
## title="TSS",
## type= "linear"))
## }
## if (input$logTurbSt==TRUE) {
## tsP <- layout(tsP, xaxis=list(
## title="Turbidity",
## type= "log"))
## } else {
## tsP <- layout(tsP, xaxis=list(
## title="Turbidity",
## type= "linear"))
## }
lm1 <- lm(tss~turbidity, data=stateData)
plot2 <- add_trace(plot2, x=lm1$model[["turbidity"]][order(lm1$model[["turbidity"]])],
y=lm1$fitted[order(lm1$model[["turbidity"]])],
type="scatter", mode="lines", name="Lin. Regr.")
plot2
## Warning: Ignoring 28 observations
td <- tidy(lm1); gl <- glance(lm1)
print(paste0("Equation is: TSS = ", format(td$estimate[2], digits=3),
" * TURB + ", format(td$estimate[1], digits=3),
" . R Squared = ", format(gl$r.squared, digits=3),
" . Given this regression, the 50 NTU standards equates to ",
format(td$estimate[2]*50+td$estimate[1], digits=4), " mg/L TSS.")
)
## [1] "Equation is: TSS = 0.298 * TURB + 2.31 . R Squared = 0.589 . Given this regression, the 50 NTU standards equates to 17.22 mg/L TSS."
Okay.
The load duration curve is just the flow duration curve multiplied by a concentration to turn it into a load. So first take the long term flow record from USGS and construct a FDC in the usual way.
Next, figure out what to multiply by. For the explicit 10% MOS in this TMDL, they say we need to be under 45 NTU. Using the regression equation from above, that is equated with 15.72 mg/L TSS. So below, fdcData$load45 is the FDC (first term) times the 15.72 TSS conc (last term), times some unit conversions (middle) to get into lbs/day. That covers the line.
For the sample points, each concentration needs to be converted to a lbs/day (y axis) and assigned an exceedance probability (“qi”, x-axis). So for the TSS samples, multiply conc. by that day’s flow and convert units to get lbs for that day. Then, following the ‘merge’ with the FDC data, every sample’s flow rate will already be matched with its ‘qi’. So we have what we need to scatter plot TSS over the LDC.
For Turbidity samples with no matching TSS, first predict TSS via the previously developed regression equation. Then use the same approach to calculate lbs/day and scatter plot those samples.
Lastly, create $allTSS by combining the TSS and Turbidity based daily loads into a single data set for plotting and subsequent regression (but not used in this figure).
## FDC prep calcs
fdcData <- usgsFlow
fdcData <- fdcData[order(fdcData$cfs, decreasing=FALSE), ]
fdcData$rank <- seq(1,nrow(fdcData))
fdcData$qi <- (1-(fdcData$rank/(nrow(fdcData)+1)))*100
fdcData$load45 <- fdcData$cfs*(60*60*24)*(28.32)*(1/(1000*453.59))*15.72
## Compute samples loads to plot as points
ldcData <- stateData
## Attach longterm qi to each sample date
ldcData <- merge(ldcData, fdcData[,c("dt.DATE", "qi")],
by="dt.DATE", all.x=TRUE, all.y=FALSE)
## New col for TSS predicted from Turb
ldcData$predTSS <- rep(NA, nrow(ldcData))
ldcData$predTSS[which(is.na(ldcData$tss))] <-
(0.298*ldcData$turbidity[which(is.na(ldcData$tss))]+2.31)
## Combine actual and estimated TSS into one col
ldcData$allTSS <- rowSums(ldcData[, c("tss", "predTSS")], na.rm=TRUE)
##
ldcData$Turb2Load <- ldcData$cfs*(60*60*24)*(28.32)*(1/(1000*453.59))*ldcData$predTSS
ldcData$TSS2Load <- ldcData$cfs*(60*60*24)*(28.32)*(1/(1000*453.59))*ldcData$tss
plot3 <- plot_ly() %>%
add_trace(data=fdcData, x=~qi, y=~load45,
type="scatter", mode="lines",
name="45 NTU Limit Curve") %>%
add_trace(data=ldcData, x=~qi, y=~Turb2Load,
type="scatter", mode="markers",
name="Turb Est.") %>%
add_trace(data=ldcData, x=~qi, y=~TSS2Load,
type="scatter", mode="markers",
name="TSS Obs.") %>%
layout(yaxis=list(type="log", title="Load (lbs/day)"))
plot3
## Warning: Ignoring 58 observations
## Warning: Ignoring 28 observations
Recreate TMDL Table 6. Note, the State only counts 12 exceedance but get 15. This is curious.
returnTSSExcursions1 <- function(chem, flow, thresh) {
high <- c(90, unname(quantile(flow$cfs, 0.90)),
nrow(chem %>% filter(cfs>unname(quantile(flow$cfs, 0.90)))),
nrow(chem %>% filter(cfs>unname(quantile(flow$cfs, 0.90)),
allTSS>thresh))
)
moist <- c(60, unname(quantile(flow$cfs, 0.60)),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.90)),
cfs>unname(quantile(flow$cfs, 0.60)))),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.90)),
cfs>unname(quantile(flow$cfs, 0.60)),
allTSS>thresh))
)
mid <- c(40, unname(quantile(flow$cfs, 0.40)),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.60)),
cfs>unname(quantile(flow$cfs, 0.40)))),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.60)),
cfs>unname(quantile(flow$cfs, 0.40)),
allTSS>thresh))
)
dry <- c(5, unname(quantile(flow$cfs, 0.05)),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.40)),
cfs>unname(quantile(flow$cfs, 0.05)))),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.40)),
cfs>unname(quantile(flow$cfs, 0.05)),
allTSS>thresh))
)
low <- c(0, unname(quantile(flow$cfs, 0.0)),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.05)))),
nrow(chem %>% filter(cfs<=unname(quantile(flow$cfs, 0.05)),
allTSS>thresh))
)
returnTable <- as.data.frame(rbind(high, moist, mid, dry, low))
names(returnTable) <- c("Percentile", "Flow Rate", "# Samples", "# Exc.")
returnTable
}
DT::datatable(returnTSSExcursions1(chem=ldcData[,c("dt.DATE","allTSS","cfs")],
flow=usgsFlow, thresh=17.22))
Looking at the high flow range, here are the data for the 4 high flow samples:
print(tail(stateData[order(stateData$cfs),], 4))
## dt.DATE dt.POSIX tss turbidity cfs
## 70 2002-11-06 2002-11-06 76 160 49.5
## 79 2003-08-05 2003-08-05 22 160 77.6
## 13 1998-01-07 1998-01-07 23 75 89.0
## 76 2003-05-22 2003-05-22 21 170 1190.0
In table 5, it was noted that of the 4 high flow samples, all 4 were 50 NTU exceedances. Table 6 from the State tells us that only 1 of those 4 exceeded 17 mg/L TSS. This doesn’t match up with the data…..
This is just a monthly summary of the number of NTU violations in the period of analysis, as presented in the TMDL. Things match up fine.
returnTSSExcursions2 <- function(chem, flow, thresh) {
chem <- chem %>%
filter(turbidity>thresh) %>%
mutate(Mon = months(dt.DATE))
monthlyTable <- data.frame(Jan=length(which(chem$Mon=="January")),
Feb=length(which(chem$Mon=="February")),
Mar=length(which(chem$Mon=="March")),
Apr=length(which(chem$Mon=="April")),
May=length(which(chem$Mon=="May")),
Jun=length(which(chem$Mon=="June")),
Jul=length(which(chem$Mon=="July")),
Aug=length(which(chem$Mon=="August")),
Sep=length(which(chem$Mon=="September")),
Oct=length(which(chem$Mon=="October")),
Nov=length(which(chem$Mon=="November")),
Dec=length(which(chem$Mon=="December")),
row.names="Exceedences")
monthlyTable
}
DT::datatable(returnTSSExcursions2(chem=ldcData[,c("dt.DATE","turbidity","cfs")],
flow=usgsFlow, thresh=50))
Here’s where I lose what they are doing….
To start with, Figure 18 takes the untransformed data and plots it in excel on a log y axis. Then fits an exponential curve for the figure. The statistical analysis in Appendix H of the TMDL does a linear fit to ln() transformed data. These are basicaly equivalent, except for differences in curve fitting algorithms between programs (if different programs were used…).
My confusion is (1) how they implemented the bias correction and (2) how they got the results that they did.
Step one is to fit a regression to ln transformed loads. That is ‘lm2’ below. The subsetting ‘[5:76]’ drops the highest 10% and lowest 5% of flows. Then I use predict to get the loads predicted by the model and the prediction interval. They appear to be basing things off an 80th percentile line, which is the same as a 60% PI upper bound under a two tailed approach.
The bias correction calc (‘bStar’) is based off the Gilbert paper cited in Appendix H. That is the exp(MSE/2) value. So at ~1.4, my quick (possibly incorrect) interpretation is that the mean load prediciton is ~1.4*load predicted by the model (which is a median value). This could be completely wrong….
I would note that the straight fit ‘Fit’, threads the sample points more or less equivalently to the TMDL fig, without any bias correction. Niether the straight or bias corrected 80th percentile prediction intervals approach what the State has given us.
## LDC prep
ldcData <- ldcData[order(ldcData$qi),]
## Combined tss turb load data col
ldcData$allLoad <- rowSums(ldcData[, c("TSS2Load", "Turb2Load")], na.rm=TRUE)
## Regression
lm2 <- lm(log(allLoad)~qi, data=ldcData[5:76,])
newx <- data.frame(qi=c(95, 10))
pred2 <- predict(lm2, newdata=newx, interval="prediction", level=0.60)
preddf2 <- as.data.frame(pred2)
predCI <- predict(lm2, newdata=newx, interval="confidence", level=0.60)
preddfCI <- as.data.frame(predCI)
sm2 <- summary(lm2)
bStar <- exp(mean(sm2$residuals^2)/2)
plot5 <- plot_ly() %>%
## Plot 45 NTU Limit Curve
add_trace(data=fdcData, x=~qi, y=~load45,
type="scatter", mode="lines",
name="45 NTU Limit Curve",
line=list(color=viridis(8)[1])) %>%
## Plot sample sets, TSS, Turb, Combined
add_trace(data=ldcData, x=~qi, y=~Turb2Load,
type="scatter", mode="markers",
name="Turb Pred.") %>%
add_trace(data=ldcData, x=~qi, y=~TSS2Load,
type="scatter", mode="markers",
name="TSS") %>%
add_trace(data=ldcData, x=~qi, y=~allLoad,
type="scatter", mode="markers", name="Comb",
marker=list(color=viridis(8)[4])) %>%
## Plot fit line and crack at bias corrected fit line
add_trace(x=newx$qi, y=exp(preddf2$fit),
type="scatter", mode="lines", name="Fit",
line = list(color = 'rgb(0, 0, 0)')) %>%
add_trace(x=newx$qi, y=exp(preddf2$fit)*bStar,
type="scatter", mode="lines", name="Bias Corrected Fit",
line = list(color = viridis(8)[6])) %>%
## Plot upper PI
add_trace(x=c(95, 10), y=exp(preddf2$upr), type="scatter",
mode="lines", name="Upper PI",
line = list(color = 'rgb(0, 0, 0)', width = 2, dash = 'dot')) %>%
add_trace(x=c(95, 10), y=exp(preddf2$upr)*bStar, type="scatter",
mode="lines", name="Bias Corrected Upper PI",
line = list(color = viridis(8)[6], width = 2, dash = 'dot')) %>%
## add_trace(x=c(95, 10), y=exp(preddf2$lwr), type="scatter",
## mode="lines", name="Lower PI") %>%
## add_trace(x=c(95, 10), y=exp(preddfCI$upr), type="scatter",
## mode="lines", name="Upper CI") %>%
## add_trace(x=c(95, 10), y=exp(preddfCI$lwr), type="scatter",
## mode="lines", name="Lower CI") %>%
layout(yaxis=list(type="log"),
legend=list(x=.1, y=100, orientation='h'))
plot5
## Warning: Ignoring 58 observations
## Warning: Ignoring 28 observations
Without an correct version of figure 18 analysis, I can’t reproduce Table 8 upon with requirments are based.